02-TIMCI-SPA-cgei Audit Analysis

Lucas Silbernagel

23 September, 2021

library(dplyr)
library(readr)
library(ggplot2)
library(openxlsx)
library(knitr)
library(tibble)
library(stringr)
library(stringi)
library(readxl)
library(lubridate)
library(shiny)
library(plotly)

1 Loading the Data and Removal of Training Data

# Unzip and extract ODK data from ODK zip
df <- as.data.frame(extract_data_from_odk_zip(params$file_path_zip, params$file_name_csv))

# Formatting dates from integer (in ms) to time stamp
df$start <- format_date_ms(df$start)
df$end <- format_date_ms(df$end)

# filtering for events that occurred after 18th July 21
df <- subset(df, as.Date(start) > as.Date("18.07.2021", "%d.%m.%Y"))

2 Deriving New Features

2.1 Time Spent per Event

# subtracting end from start date
df$time_spent = round(as.numeric(df$end - df$start))

2.2 Question

# splitting the node strings so that only the question name remains 
df$question = sapply(df$node, create_question)

2.3 Question Decoded

df <- decode_question(df, df$question, params$codebook)

2.4 Categorical Answers Decoded

df <- decode_categories(df, params$codebook)

2.5 Time until a Response was Changed + Stream of Answer Changes

df <- df %>%
# bringing the data in the right order   
  arrange(`instance ID`, node, start) %>%
# adding two empty columns to store the new features in
  add_column(time_till_change=NA) %>%
  add_column(changed_from=NA)

# iterating over the df and computing the time it took until an answer was changed + adding what the question was before 
for (i in 1:nrow(df)){
  if (df$`old-value`[i]==df$`new-value`[i-1] && !is.na(df$`old-value`[i]) && !is.na(df$`new-value`[i-1]) ){
    df$time_till_change[i] <- round(as.numeric(df$start[i]-df$end[i-1]))
  } else{
    next
  }
}

2.6 Preview and Summary of the Final Data

head(df)
instance ID event node start end latitude longitude accuracy old-value new-value time_spent question question_decoded new_value_decoded old_value_decoded time_till_change changed_from
uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 group questions /data/a1 2021-08-30 13:02:55 2021-08-30 13:03:25 NA NA NA NA NA 29 a1 NA NA NA NA NA
uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 question /data/a1/a1_a_4a 2021-08-30 13:02:55 2021-08-30 13:03:25 NA NA NA NA T-F0014-P0223 29 a1_a_4a If QR code scanning is not possible, please manually enter the participant identification code T-F0014-P0223 NA NA NA
uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 group questions /data/b1 2021-08-30 13:02:54 2021-08-30 13:02:55 NA NA NA NA NA 1 b1 NA NA NA NA NA
uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 group questions /data/b2 2021-08-30 13:07:05 2021-08-30 13:08:32 NA NA NA NA NA 86 b2 NA NA NA NA NA
uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 group questions /data/b2 2021-08-30 13:08:34 2021-08-30 13:09:24 NA NA NA NA NA 50 b2 NA NA NA NA NA
uuid:00021afc-7e8e-40c4-82ba-66b2a2a60496 question /data/b2/b1_7 2021-08-30 13:07:05 2021-08-30 13:08:32 NA NA NA NA 1 86 b1_7 Is this facility the closest health facility to your home? Yes NA NA NA
summary(df)
##  instance ID           event               node               start                    
##  Length:14117       Length:14117       Length:14117       Min.   :2021-07-20 11:55:08  
##  Class :character   Class :character   Class :character   1st Qu.:2021-07-28 13:38:51  
##  Mode  :character   Mode  :character   Mode  :character   Median :2021-08-20 11:40:39  
##                                                           Mean   :2021-08-14 02:07:37  
##                                                           3rd Qu.:2021-08-26 09:46:43  
##                                                           Max.   :2021-08-31 18:51:19  
##                                                                                        
##       end                      latitude       longitude      accuracy      
##  Min.   :2021-07-20 11:55:09   Mode:logical   Mode:logical   Mode:logical  
##  1st Qu.:2021-07-28 13:40:22   NA's:14117     NA's:14117     NA's:14117    
##  Median :2021-08-20 11:26:44                                               
##  Mean   :2021-08-14 00:59:12                                               
##  3rd Qu.:2021-08-26 09:44:05                                               
##  Max.   :2021-08-31 18:51:18                                               
##  NA's   :1986                                                              
##   old-value          new-value           time_spent       question        
##  Length:14117       Length:14117       Min.   :   0.0   Length:14117      
##  Class :character   Class :character   1st Qu.:   6.0   Class :character  
##  Mode  :character   Mode  :character   Median :  17.0   Mode  :character  
##                                        Mean   :  33.2                     
##                                        3rd Qu.:  34.0                     
##                                        Max.   :9537.0                     
##                                        NA's   :1986                       
##  question_decoded   new_value_decoded  old_value_decoded  time_till_change
##  Length:14117       Length:14117       Length:14117       Min.   : 1.00   
##  Class :character   Class :character   Class :character   1st Qu.: 2.00   
##  Mode  :character   Mode  :character   Mode  :character   Median : 5.00   
##                                                           Mean   :12.10   
##                                                           3rd Qu.:14.25   
##                                                           Max.   :54.00   
##                                                           NA's   :14037   
##  changed_from  
##  Mode:logical  
##  NA's:14117    
##                
##                
##                
##                
## 

3 General Information about the Data

no_inst = length(unique(df$`instance ID`))
no_event =  nrow(df)
earliest_start = as.Date(min(df$start)) 
latest_end = as.Date(max(df$end[!is.na(df$end)]))

Total number of instances: 307
Total number of events/questions: 14117
Examination period: 2021-07-20 - 2021-08-31

4 Grouped by Time

4.1 Events/Questions Started by Day

df_by_day <- df %>%
  mutate(start_date = as.Date(start)) %>%
  count(start_date, name = "count")

gg1 <- ggplot(df_by_day, aes(x = start_date, y = count)) +
  geom_line() +
  geom_smooth(alpha=0.5, colour="red", method="loess", se=F) +
  labs(title = "Number of Events/Questions Started by Day with Smoothed Regression Line", y =  "Number of Questions/Events Started", x = "Satrt Date") +
  theme_light() 
gg1

4.2 Questions/Events started by Weekday and Hour of the Day

df_wday_hour <- df %>%
  mutate(wday=wday(start, label=T, week_start = 1), hour=hour(start)) %>%
  count(wday, hour, name="count_wday_hour") %>%
  arrange(desc(wday))

theme_heatmap <- theme_light() +                 
  theme(panel.grid = element_blank(),            
        panel.border = element_blank(),          
        plot.title = element_text(face = "bold", size = 11, hjust = 0.5), 
        axis.ticks = element_blank(),            
        axis.title.x = element_blank(),        
        axis.title.y = element_text(size=10),   
        axis.text.y = element_text(size = 8),    
        axis.text.x = element_text(size = 10),   
        legend.position = "none")                

gg2 <- ggplot(df_wday_hour, aes(x = wday, y = hour, fill = count_wday_hour)) +
  geom_tile(colour="white") +  
  scale_fill_gradient(low = "#fff0f0", high="#940606") +  
  scale_y_reverse(breaks=c(23:0), labels=c(23:0), expand = c(0,0)) +               
  scale_x_discrete(expand = c(0,0), position = "top") +
  labs(title = "Number of Started Events/Questions by Day of Week / Hour of Day", y = "Hour of Day") +
  geom_text(aes(label = count_wday_hour), size = 2) +
  theme_heatmap  
gg2

4.3 Distribution of Time Spent per Event/Question with largest 5 % removed

df_clean = subset(df, time_spent<quantile(df$time_spent,0.95, na.rm=TRUE))

hist(df_clean$time_spent[!is.na(df_clean$time_spent)]/60, breaks=20, xlab = "Time Spent in Minutes", main = "Histogram of the Time Spent by Question")

5 Aggregated by Event/Question

5.1 Median Time Spent by Question

df_median_time_per_question <- df %>%
  filter(event=="question") %>%
  group_by(question_decoded) %>%
  summarise(median_time_spent = median(time_spent)) %>%
  arrange(desc(median_time_spent)) %>%
  mutate(median_time_spent = round(seconds_to_period(median_time_spent)))

df_median_time_per_question
question_decoded median_time_spent
Were you given a paper or record to take with you for completing the referral? 1M 34S
Were you told why to go? 1M 34S
What do you intend to do now? 1M 34S
When do you need to complete the referral? 1M 34S
Can you specify these signs and symptoms? 53S
Were you told where to go? 41S
If QR code scanning is not possible, please manually enter the participant identification code 30S
Did the provider speak in a language you understand? 28S
Did you feel the provider treated you and the child with respect? 28S
Did you find the provider showed concern and empathy? 28S
Did you find the provider was kind to you? 28S
How do you feel overall with the service you received at the facility today? 28S
Was the service delayed or were you kept waiting for a long time? 28S
Would you recommend this facility to a friend / family with a sick child? 28S
Did you pay for something at the facility today? 26S
Did you miss work to bring the child to the facility today? 26S
Do you intend to buy some medicines outside of the facility? 26S
Is this facility the closest health facility to your home? 26S
Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? 26S
What do you intend to do if the sick child does not get completely better or become worse? 26S
Were you given general information or advice about feeding or breastfeeding? 25S
Can you explain to me why this device was used? 24S
Please scan the participant’s QR code 18S
Can you show me all the medicines and prescriptions that you received? 17S
Did the provider explain to you how to give these medicines to the child at home? 17S
How confident do you feel in how much of the medication to give each day and how many days to give it? 17S
How did you feel with the fact that the provider used of a tablet for the consultation of the child? 16S
Did the provider explain to you the result that was given by the device? 14S
Did the provider give or prescribe any medicines for the child to take home? 13S
Did the provider refer the child? 13S
Did the provider tell you what illness your child has? 13S
Please select the current district 12S
Did the provider use the device that is represented in the following picture during the consultation of the child? 9S
fcode 9S
Did the provider use a tablet like this one for the consultation of the child? 5S

5.2 Count of Input Changes and Median Time until Input was Changed by Question

df_changes_per_question <- df %>%
  filter(event=="question", 
         !is.na(time_till_change)) %>%
  group_by(question_decoded) %>%
  summarise(count_input_changes=n(), 
            median_time_till_change=median(time_till_change), 
            sd_time_till_change=sd(time_till_change)) %>%
  arrange(desc(count_input_changes)) %>%
  mutate(median_time_till_change = round(seconds_to_period(median_time_till_change)),
         sd_time_till_change = round(seconds_to_period(sd_time_till_change), 1)) %>%
  filter(count_input_changes > 1)

df_changes_per_question
question_decoded count_input_changes median_time_till_change sd_time_till_change
Did the provider explain to you how to give these medicines to the child at home? 10 4S 16.8S
Can you show me all the medicines and prescriptions that you received? 9 8S 17.1S
Was the service delayed or were you kept waiting for a long time? 7 5S 16S
Do you intend to buy some medicines outside of the facility? 6 11S 8.4S
How confident do you feel in how much of the medication to give each day and how many days to give it? 6 4S 19.1S
How do you feel overall with the service you received at the facility today? 5 5S 11.6S
If QR code scanning is not possible, please manually enter the participant identification code 5 22S 23.6S
Would you recommend this facility to a friend / family with a sick child? 5 6S 13.6S
Did you pay for something at the facility today? 4 3S 1.6S
Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? 4 24S 19.4S
Were you given general information or advice about feeding or breastfeeding? 3 2S 1.2S
Can you explain to me why this device was used? 2 2S 0S
Did the provider tell you what illness your child has? 2 4S 2.1S
Did the provider use the device that is represented in the following picture during the consultation of the child? 2 6S 2.1S
Did you feel the provider treated you and the child with respect? 2 16S 19.8S
Did you find the provider showed concern and empathy? 2 2S 1.4S

5.3 Count of Old-New Value Pairs

df_stream <- df %>%
  filter(!is.na(time_till_change)) %>%
  count(question_decoded, 
        old_value_decoded, 
        new_value_decoded, 
        name="count_value_pairs", 
        sort=TRUE) %>%
  filter(count_value_pairs > 1)

df_stream
question_decoded old_value_decoded new_value_decoded count_value_pairs
Can you show me all the medicines and prescriptions that you received? All medicines received, no unfilled prescriptions Some medicines and some unfilled prescriptions 4
Did the provider explain to you how to give these medicines to the child at home? Yes, for all medicines Yes, but only for some medicines 3
Would you recommend this facility to a friend / family with a sick child? Strongly agree Agree 3
Can you show me all the medicines and prescriptions that you received? Some medicines and some unfilled prescriptions Prescriptions only, no medicines 2
Did the provider explain to you how to give these medicines to the child at home? Yes, but only for some medicines Yes, for all medicines 2
Did the provider explain to you how to give these medicines to the child at home? Yes, for all medicines No 2
Did you feel the provider treated you and the child with respect? Agree Strongly agree 2
Did you pay for something at the facility today? No Yes 2
Do you intend to buy some medicines outside of the facility? No Yes, prescribed by the healthcare provider but not available at the facility 2
How confident do you feel in how much of the medication to give each day and how many days to give it? Very confident Neutral 2
How do you feel overall with the service you received at the facility today? Very satisfied Somewhat satisfied 2
Was the service delayed or were you kept waiting for a long time? Agree Neither agree nor disagree 2
Was the service delayed or were you kept waiting for a long time? Strongly agree Disagree 2
Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? Yes No 2

6 Aggregated by Instance

6.1 Top 10 % of Duration by Instance

df_duration_per_inst <- df %>%
  group_by(`instance ID`) %>%
  summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>% 
  filter(duration_per_inst>quantile(duration_per_inst, 0.9, na.rm=TRUE)) %>%
  mutate(duration_per_inst = round(seconds_to_period(duration_per_inst))) %>%
  arrange(desc(duration_per_inst))

df_duration_per_inst
instance ID duration_per_inst
uuid:50e579ce-3dde-43f0-9aec-d524233cfcb0 13d 8H 41M 3S
uuid:92b7bd54-b06e-4c24-b7eb-44ef3de7d10f 3d 2H 19M 12S
uuid:9b0ac8c9-6a22-441b-aad3-0df639715b21 20H 51M 60S
uuid:894e09b4-b086-4f3d-b5ad-9b37c3d7db5e 10H 3M 3S
uuid:f0797482-3b4c-49f8-ac80-362fb9f4fb06 8H 45M 29S
uuid:90e19e6b-9fc5-4776-af28-921a55c7664d 8H 36M 34S
uuid:096ab426-0473-442d-8441-d661ce7d2ec2 8H 32M 42S
uuid:5eb6622a-9327-4d22-b580-016d9913a435 8H 30M 42S
uuid:b9e5bde0-3ba1-4e54-921f-ead49247c45f 8H 28M 30S
uuid:d4d74cf1-e3db-42f5-9a9d-fbc463ba9abf 8H 0M 54S
uuid:0c971016-85d3-4892-998c-e7b3f0125309 7H 56M 37S
uuid:a60e6235-ec17-4730-8eff-37c764cd77d8 7H 50M 20S
uuid:1a1ac120-825c-4edf-a418-43674dd58c40 7H 37M 26S
uuid:1e70b4cc-4d97-4697-96b9-f89b5cc84bb4 7H 35M 31S
uuid:9da2333c-6ff3-4f6d-9f86-b8438195bc73 7H 33M 26S
uuid:bae4f3d0-c176-4f02-8f9c-c4cd88819f11 7H 28M 17S
uuid:d352bd5c-335c-44d3-9ae1-7c7871bcb28e 7H 11M 54S
uuid:8c88164d-b0e7-4f35-8e59-56c237eb5330 6H 59M 27S
uuid:b86b9b2a-7920-47e4-8008-05e6f3b2fd72 6H 58M 55S
uuid:e4ef13de-9892-48e0-8b80-3a3fd7a157b9 6H 55M 59S
uuid:cfe21b8e-3b41-4907-b591-90b4c390e124 6H 1M 32S
uuid:25c32682-b91c-4e73-accf-fc3a09adae30 5H 45M 47S
uuid:a0a371b0-dcaf-4f8c-9dd5-919203561784 5H 39M 52S
uuid:272b75c7-c69e-4fa0-91c2-262cab9f50f0 5H 36M 29S
uuid:82fa132a-248a-4a56-8079-1a33d7aa6ed9 5H 33M 0S
uuid:538512f3-d12e-4502-9d64-8034df81fb62 5H 29M 8S
uuid:9d162283-ea8a-460b-b851-7df408406ede 5H 26M 42S
uuid:fc3d50fe-4d9a-4708-8aeb-277e2c660866 5H 22M 23S
uuid:1c0f8e5d-b732-477f-8d43-c5f8753c61c5 5H 22M 16S
uuid:5842458a-51ee-47d4-92dd-c860d6bc871d 5H 21M 59S
uuid:3881af79-ebe5-4ec7-ace6-fd2d7092fccc 5H 14M 51S

6.2 Distribution of Duration by Instance with Top 10 % excluded

df_subsetted <- df %>%
  group_by(`instance ID`) %>%
  summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>%
  filter(duration_per_inst<quantile(duration_per_inst, 0.9, na.rm=TRUE))
 
hist(as.numeric(df_subsetted$duration_per_inst/60), breaks=30, main="Duration per Instance in Minutes (outliers removed)", xlab="Duration in Minutes")

7 Irregularities and Outliers

7.1 Time Till Change Outliers (for all data without removed outliers)

df_time_till_change_outliers <- df %>% 
  filter(time_till_change>quantile(df$time_till_change, 0.9, na.rm=TRUE)) %>% 
  arrange(desc(time_till_change)) %>%
  mutate(time_till_change = round(seconds_to_period(time_till_change))) %>%
  select(`instance ID`, 
         question_decoded, 
         old_value_decoded, 
         new_value_decoded, 
         time_till_change)

df_time_till_change_outliers
instance ID question_decoded old_value_decoded new_value_decoded time_till_change
uuid:9abf7a26-9060-43de-a344-bad7ae1ecb1c What do you intend to do if the sick child does not get completely better or become worse? Return to this facility Not sure 54S
uuid:1c0f8e5d-b732-477f-8d43-c5f8753c61c5 If QR code scanning is not possible, please manually enter the participant identification code T-F0014-P0150 T-F0014-P0222 50S
uuid:fc3d50fe-4d9a-4708-8aeb-277e2c660866 If QR code scanning is not possible, please manually enter the participant identification code T-F0014-P0146 T-F0014-P0218 48S
uuid:388e775f-8ecc-4271-94e7-9c1d079af8a8 Was the service delayed or were you kept waiting for a long time? Agree Neither agree nor disagree 46S
uuid:46717996-f9dd-403b-9f85-2e519d1b0939 Can you show me all the medicines and prescriptions that you received? Prescriptions only, no medicines Some medicines and some unfilled prescriptions 46S
uuid:46717996-f9dd-403b-9f85-2e519d1b0939 Did the provider explain to you how to give these medicines to the child at home? No Yes, but only for some medicines 46S
uuid:46717996-f9dd-403b-9f85-2e519d1b0939 How confident do you feel in how much of the medication to give each day and how many days to give it? Neutral Very confident 46S
uuid:46717996-f9dd-403b-9f85-2e519d1b0939 Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? Yes No 46S

7.2 Histograms of Instances with Inconsistent Filling Behaviour

irregular_inst = c()
for (id in unique(df$`instance ID`)){
  bin_vec = cut(df$start[df$`instance ID`==id], 
                breaks=10, 
                labels=F)
  if (length(unique(bin_vec)) < 5) irregular_inst = c(irregular_inst, id)
}
paste0(length(irregular_inst), " out of ", length(unique(df$`instance ID`))," instances were found to have an inconsistent filling behaviour.")
## [1] "198 out of 307 instances were found to have an inconsistent filling behaviour."
last_bin_questions = c()
fig <- plot_ly(alpha=0.1)
for (id in irregular_inst){
  temp_df = df[df$`instance ID`==id,]
  temp_df$cut = cut(temp_df$start, breaks=10, labels=c("1. Part", "2. Part", "3. Part", "4. Part", "5. Part", "6. Part", "7. Part", "8. Part", "9. Part", "10. Part"))
  fig <- fig %>% add_histogram(x=temp_df$cut, name=id)
  
  last_bin_questions = c(last_bin_questions, temp_df$question_decoded[temp_df$cut=="10. Part"])
}
fig <- fig %>% layout(barmode = "overlay")
fig
kable(table(last_bin_questions) %>% as.data.frame() %>% arrange(desc(Freq)))
last_bin_questions Freq
Did you pay for something at the facility today? 13
Did you miss work to bring the child to the facility today? 11
Do you intend to buy some medicines outside of the facility? 11
Is this facility the closest health facility to your home? 10
Was the service delayed or were you kept waiting for a long time? 9
Would you recommend this facility to a friend / family with a sick child? 8
Can you show me all the medicines and prescriptions that you received? 7
Did the provider explain to you how to give these medicines to the child at home? 7
Did the provider speak in a language you understand? 6
Did you find the provider showed concern and empathy? 6
Did you find the provider was kind to you? 6
How confident do you feel in how much of the medication to give each day and how many days to give it? 6
Did you feel the provider treated you and the child with respect? 5
How do you feel overall with the service you received at the facility today? 5
Were you informed of signs / symptoms that require you to bring the child back to the facility immediately? 5
What do you intend to do if the sick child does not get completely better or become worse? 5
Can you specify these signs and symptoms? 4
Were you given general information or advice about feeding or breastfeeding? 4
Did the provider give or prescribe any medicines for the child to take home? 3
Did the provider refer the child? 3
Did the provider tell you what illness your child has? 3
If QR code scanning is not possible, please manually enter the participant identification code 3
Can you explain to me why this device was used? 2
Did the provider use a tablet like this one for the consultation of the child? 2
Did the provider use the device that is represented in the following picture during the consultation of the child? 2
How did you feel with the fact that the provider used of a tablet for the consultation of the child? 2